In [1]:
import numpy as np
import pandas as pd
import copy
import geopandas as gpd
import plotly.graph_objs as go
import plotly as py
from plotly.offline import init_notebook_mode, iplot
import matplotlib.pyplot as plt

Load state names and the corresponding abbreviation

In [2]:
states = pd.read_excel("./messy_data/states.xlsx")
states.head()
Out[2]:
State Abbreviation
0 Alabama AL
1 Alaska AK
2 Arizona AZ
3 Arkansas AR
4 California CA

Delete District of Columbia and add a Abbraviation FED for Federal because DC is underneath the Federal jurisdiction.

In [3]:
states.drop(states.index[states.Abbreviation == "DC"], inplace=True)
states.columns = ["Jurisdiction", "Jurisdiction Abbreviation"]
states = states.append({"Jurisdiction" : "Federal", "Jurisdiction Abbreviation" : "FED"}, ignore_index=True)
states.tail()
Out[3]:
Jurisdiction Jurisdiction Abbreviation
47 West Virginia WV
48 Wisconsin WI
49 Wyoming WY
50 Puerto Rico PR
51 Federal FED

Load regions and divisions

The U.S. Census Bureau divides the country into 4 regions -- Northeast, Midwest, South and West. Each region is futhre divided into several divisions.

In [4]:
regions = pd.read_excel("./messy_data/state_region.xlsx")
regions.head()
Out[4]:
State Division Region
0 Connecticut New England Northeast
1 Maine New England Northeast
2 Massachusetts New England Northeast
3 New Hampshire New England Northeast
4 Rhode Island New England Northeast

Drop District of Columbia.

In [5]:
regions.drop(regions.index[regions.State == "District of Columbia"], inplace=True)

Load polygon geodata for different states

In [6]:
usMap = gpd.read_file("./us_states_map.json")
usMap.head()
Out[6]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3...
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33....
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ...
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4...
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000...

Drop District of Columbia and Puerto Rico.

In [7]:
usMap = usMap.loc[list(map(lambda x: x not in ["Puerto Rico", "District of Columbia"], usMap.NAME))]

Add state abbreviation and region and division to the geo-dataframe.

In [8]:
usMap = usMap.merge(states, left_on="NAME", right_on = "Jurisdiction", how="left").drop("Jurisdiction", axis=1)
usMap.columns = ["GEO_ID", "STATE", "NAME", "LSAD", "CENSUSAREA", "geometry", "Abbreviation"]
In [9]:
usMap = usMap.merge(regions, left_on="NAME", right_on = "State", how="left").drop("State", axis=1)
usMap.head()
Out[9]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry Abbreviation Division Region
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3... AZ Mountain West
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33.... AR West South Central South
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ... CA Pacific West
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4... CO Mountain West
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000... CT New England Northeast

Load prisoner population data

In [10]:
malePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Male", header=9, nrows=54).dropna(1, "all").dropna(0)
In [11]:
femalePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Female", header=9, nrows=54).dropna(1, "all").dropna(0)
In [12]:
totalPrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Total", header=9, nrows=54).dropna(1, "all").dropna(0)
totalPrisonerPopulation.head()
Out[12]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 29803.0 26371.0 24363.0 28133.0 29673.0 31926.0 34263.0 40223.0 44408.0 ... 199618 201280 208118 209771 216362 217815 215866 210567 196455 189192
3 Alabama 5625.0 5464.0 6543.0 7657.0 9233.0 9856.0 10482.0 11015.0 11710.0 ... 29412 30508 31874 31764 32270 32431 32381 31771 30810 28883
4 Alaska 712.0 760.0 822.0 1024.0 1322.0 1631.0 1967.0 2329.0 2460.0 ... 5167 5014 5285 5391 5597 5633 5081 5794 5338 4434
5 Arizona 3456.0 3749.0 4372.0 5223.0 6069.0 6889.0 7845.0 8531.0 9434.0 ... 37746 39502 40544 40209 40020 40080 41177 42259 42719 42320
6 Arkansas 2654.0 3042.0 2911.0 3328.0 3922.0 4246.0 4482.0 4611.0 4701.0 ... 14314 14716 15208 16204 16108 14654 17235 17874 17707 17537

5 rows × 40 columns

Clean and melt the population tables

In [13]:
def cleanAndMeltPopulationTable(table, states):
    
    # add DC data to Federal, and delete it
    table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
    table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)

    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = "Population")
    table.Year = table.Year.astype(int)
    
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [14]:
meltedMalePrisonerPopulation = cleanAndMeltPopulationTable(malePrisonerPopulation, states)
meltedFemalePrisonerPopulation = cleanAndMeltPopulationTable(femalePrisonerPopulation, states)
meltedTotalPrisonerPopulation = cleanAndMeltPopulationTable(totalPrisonerPopulation, states)

meltedTotalPrisonerPopulation.head()
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Out[14]:
Jurisdiction Jurisdiction Abbreviation Year Population
0 Alabama AL 1978 5625
1 Alabama AL 1979 5464
2 Alabama AL 1980 6543
3 Alabama AL 1981 7657
4 Alabama AL 1982 9233

Aggregate and sum the state prisoner population for regions and divisions

In [15]:
def aggregateRegionalPopulationSum(table, regions):
    
    table = regions.merge(table, left_on="State", right_on="Jurisdiction", how="right")
    temp_index = table["Jurisdiction"] == "Federal"
    table.loc[temp_index, "Region"] = "Federal"
    table.loc[temp_index, "Division"] = "Federal"
    
    regionSum = table.groupby(["Year", "Region"]).Population.sum()
    divisionSum = table.groupby(["Year", "Region", "Division"]).Population.sum()
    
    return regionSum, divisionSum
In [16]:
malePrisonerPopulationRegionSum, malePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedMalePrisonerPopulation, regions)
femalePrisonerPopulationRegionSum, femalePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedFemalePrisonerPopulation, regions)
totalPrisonerPopulationRegionSum, totalPrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedTotalPrisonerPopulation, regions)

Plot maps illustrating the prisoner population in different states

In [17]:
def plotAnnualStatePopulation(table, gender, years_chosen):
    
    table = table[table.Jurisdiction != "Federal"]

    cmax = table.Population.max()
    cmin = table.Population.min()
    
    years = table.Year.unique()

    heatMapData = [{"type" : "choropleth",  "locations" : table.loc[table.Year == year, "Jurisdiction Abbreviation"], "locationmode" : "USA-states", "colorscale" : "Viridis", "zmin" : cmin, "zmax" : cmax, "z" : table.Population[table.Year == year].astype(float)} for year in years]
    mapLayout = [{"geo" : {"scope" : 'usa'}, "title" : gender + " Prisoner Population by State, " + str(year)} for year in years]
    
    for i, data in enumerate(heatMapData):
        if years[i] in years_chosen:
            usHeatMap = go.Figure(data=[data], layout=mapLayout[i])
            iplot(usHeatMap)
In [18]:
# gender = "Total"

# cmax = table.Population.max()
# cmin = table.Population.min()

# heatMapData = [dict(type = "choropleth", locations=table.loc[table.Year == year, "Jurisdiction Abbreviation"], locationmode="USA-states", colorscale="Viridis", zmin=cmin, zmax=cmax, z=table.Population[table.Year == year].astype(float), text = str(year), geo = 'geo'+str(i+1) if i != 0 else 'geo') for i, year in enumerate(years)]
# mapLayout = {"geo" + (str(i + 1) if i != 0 else "" ) : {"scope" : 'usa', "domain" : dict(x=[0, 1], y=[i / len(years), (i + 1) / len(years)])} for i in range(len(years))}
# fig = {'data' : heatMapData, 'layout' : mapLayout}

# iplot(fig)
In [19]:
init_notebook_mode()
In [20]:
plotAnnualStatePopulation(meltedMalePrisonerPopulation, "Male", [1978, 2016])
In [21]:
plotAnnualStatePopulation(meltedFemalePrisonerPopulation, "Female", [1978, 2016])
In [22]:
plotAnnualStatePopulation(meltedTotalPrisonerPopulation, "Total", [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015])

Plot maps illustrating priosoner population in different regions and divisions

In [23]:
def plotGeoPandasUSMap(usMap, title=None, dataColumn=None, dataLimit=(None, None)):
    
    fig, mapAx = plt.subplots(1, 1)
    
    mapLimitW, mapLimitE, mapLimitS, mapLimitN = -185, -65, 15, 75
    mapAx.axis((mapLimitW, mapLimitE, mapLimitS, mapLimitN))
    mapAx.axis('off')
    mapAx.set_aspect('equal', 'box')
    mapAx.set_title(title)
    
    usMap.plot(column=dataColumn, edgecolor="k", ax=mapAx, legend=True, vmin=dataLimit[0], vmax=dataLimit[1]) #, figsize=(20,20))
    
#     sm = plt.cm.ScalarMappable(cmap='viridis_r', norm=plt.Normalize(vmin=dataLimit[0], vmax=dataLimit[1]))
#     sm._A = []
#     cbar = mapAx.colorbar(sm)
In [24]:
def plotRegionalPopulation(table, usMap, gender, years_chosen):
    
    regionType = table.index.names[1]

    tableWOFederal = table[table.index.get_level_values(1) != "Federal"]

    years = table.index.get_level_values(0).unique()
    dividedTableWOFederal = [tableWOFederal[year] for year in years]

    usRegionMap = usMap[[regionType, "geometry"]].dissolve(regionType)

    colorlimit = (tableWOFederal.min(), tableWOFederal.max())
    for i, oneYearTable in enumerate(dividedTableWOFederal):
        year = years[i]
        
        if year in years_chosen:

            oneYearTable = usRegionMap.join(oneYearTable)

            figtitle = gender + " Prisoner Population by " + regionType + ", " + str(year)
            plotGeoPandasUSMap(oneYearTable, figtitle, "Population", colorlimit)


    figtitle = gender + " Prisoner Population by " + regionType
    ax = table.unstack().plot(title = figtitle)
    ax.set_ylim(bottom=0)
    ax.tick_params(axis = "y", length=0)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=len(table.index.get_level_values(1).unique()))
In [25]:
%matplotlib inline
plt.rcParams["figure.figsize"] = (10,5)

Plot regions

In [26]:
plotRegionalPopulation(malePrisonerPopulationRegionSum, usMap, "Male", [1978, 2016])
In [27]:
plotRegionalPopulation(femalePrisonerPopulationRegionSum, usMap, "Female", [1978, 2016])
In [28]:
plotRegionalPopulation(totalPrisonerPopulationRegionSum, usMap, "Total", [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015])

Plot Divisions

In [29]:
def dropRegionIndex(table):
    table = copy.deepcopy(table)
    
    table.index = table.index.droplevel(1)
    return table
In [30]:
plotRegionalPopulation(dropRegionIndex(malePrisonerPopulationDivisionSum), usMap, "Male", [1978, 2016])
In [31]:
plotRegionalPopulation(dropRegionIndex(femalePrisonerPopulationDivisionSum), usMap, "Female", [1978, 2016])
In [32]:
plotRegionalPopulation(dropRegionIndex(totalPrisonerPopulationDivisionSum), usMap, "Total", [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015])

Load facility occupancy data

In [33]:
custodyPopulation_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Custody Population", na_values="/")

occupancyLowRate_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Highest Capacity", na_values="/")
occupancyHighRate_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Lowest Capacity", na_values="/")
In [34]:
custodyPopulation_11_16.head()
Out[34]:
Jurisdiction 2011 2012 2013 2014 2015 2016
0 Federal 176228 176658 174242.0 169840 160946 154339
1 Alabama 26268 26230 26271.0 25664 24814 23397
2 Alaska 3708 4575 5054.0 5188 5247 4378
3 Arizona 33492 33578 34626.0 35181 35733 42248
4 Arkansas 14090 14043 14295.0 15250 15784 15833

Two occupancy rates were reported -- low estimation and high estimation

In [35]:
occupancyHighRate_11_16.head()
Out[35]:
Jurisdiction 2011 2012 2013 2014 2015 2016
0 Federal 138.0 137.3 133.103654 127.958050 119.7 114.0
1 Alabama 196.0 195.7 197.259348 192.701607 186.3 175.7
2 Alaska 121.0 149.6 NaN 96.935725 98.0 90.5
3 Arizona 90.0 91.5 94.397645 93.365357 96.0 108.6
4 Arkansas 105.0 101.3 102.952827 98.839847 102.6 101.3
In [36]:
occupancyHighRate_11_16.head()
Out[36]:
Jurisdiction 2011 2012 2013 2014 2015 2016
0 Federal 138.0 137.3 133.103654 127.958050 119.7 114.0
1 Alabama 196.0 195.7 197.259348 192.701607 186.3 175.7
2 Alaska 121.0 149.6 NaN 96.935725 98.0 90.5
3 Arizona 90.0 91.5 94.397645 93.365357 96.0 108.6
4 Arkansas 105.0 101.3 102.952827 98.839847 102.6 101.3

Clean and melt tables

In [37]:
def cleanAndMeltCustodyOccupancyTable(table, states, tableType):
    
    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = tableType)
    if tableType == "Occupancy":
        table.Occupancy = table.Occupancy / 100
    
    table.Year = table.Year.astype(int)
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [38]:
meltedCustodyPopulation_11_16 = cleanAndMeltCustodyOccupancyTable(custodyPopulation_11_16, states, "Population")
meltedOccupancyLowRate_11_16 = cleanAndMeltCustodyOccupancyTable(occupancyLowRate_11_16, states, "Occupancy")
meltedOccupancyHighRate_11_16 = cleanAndMeltCustodyOccupancyTable(occupancyHighRate_11_16, states, "Occupancy")
In [39]:
meltedCustodyPopulation_11_16.head()
Out[39]:
Jurisdiction Jurisdiction Abbreviation Year Population
0 Alabama AL 2011 26268.0
1 Alabama AL 2012 26230.0
2 Alabama AL 2013 26271.0
3 Alabama AL 2014 25664.0
4 Alabama AL 2015 24814.0
In [40]:
def plotAnnualStateOccupancy(table, low_or_high, years_chosen):
    
    table = table[table.Occupancy != "Federal"]

    cmax = table.Occupancy.max()
    cmin = table.Occupancy.min()
    
    years = table.Year.unique()

    heatMapData = [{"type" : "choropleth",  "locations" : table.loc[table.Year == year, "Jurisdiction Abbreviation"], "locationmode" : "USA-states", "colorscale" : "Viridis", "zmin" : cmin, "zmax" : cmax, "z" : table.Occupancy[table.Year == year].astype(float)} for year in years]
    mapLayout = [{"geo" : {"scope" : 'usa'}, "title" : "Prison Occupancy (" + low_or_high + " Estimation) by State, " + str(year)} for year in years]
    
    for i, data in enumerate(heatMapData):
        if years[i] in years_chosen:
            usHeatMap = go.Figure(data=[data], layout=mapLayout[i])
            iplot(usHeatMap)

Plot occupancy maps for states

In [41]:
plotAnnualStateOccupancy(meltedOccupancyLowRate_11_16, "Low", [2011, 2016])
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/core/ops.py:1649: FutureWarning:

elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison

In [42]:
plotAnnualStateOccupancy(meltedOccupancyHighRate_11_16, "High", [2011, 2016])

Calculate regional and divisional occupancy

In [43]:
def aggregateRegionalOccupancy(table, populationTable, regions):
    
    table = regions.merge(table.dropna(), left_on="State", right_on="Jurisdiction", how="right")
    table = table.merge(populationTable[["Jurisdiction", "Year", "Population"]].dropna(), on=["Jurisdiction", "Year"], how="left")

    temp_index = table["Jurisdiction"] == "Federal"
    table.loc[temp_index, "Region"] = "Federal"
    table.loc[temp_index, "Division"] = "Federal"
    table["Capacity"] = table.Population / table.Occupancy
    
    regionCapacity = table.groupby(["Year", "Region"]).Capacity.sum()
    divisionCapacity = table.groupby(["Year", "Region", "Division"]).Capacity.sum()
    
    regionPopulation = table.groupby(["Year", "Region"]).Population.sum()
    divisionPopulation = table.groupby(["Year", "Region", "Division"]).Population.sum()
    
    regionOccupancy = (regionPopulation / regionCapacity).rename("Occupancy")
    divisionOccupancy = (divisionPopulation / divisionCapacity).rename("Occupancy")
    
    return regionOccupancy, divisionOccupancy
In [44]:
regionLowOccupancy, divisionLowOccupancy = aggregateRegionalOccupancy(meltedOccupancyLowRate_11_16, meltedCustodyPopulation_11_16, regions)
regionHighOccupancy, divisionHighOccupancy = aggregateRegionalOccupancy(meltedOccupancyHighRate_11_16, meltedCustodyPopulation_11_16, regions)

Plot maps for regions and divisions

In [45]:
def plotRegionalOccupancy(table, usMap, low_or_high, years_chosen):
    
    regionType = table.index.names[1]

    tableWOFederal = table[table.index.get_level_values(1) != "Federal"]

    years = table.index.get_level_values(0).unique()
    dividedTableWOFederal = [tableWOFederal[year] for year in years]

    usRegionMap = usMap[[regionType, "geometry"]].dissolve(regionType)

    colorlimit = (tableWOFederal.min(), tableWOFederal.max())
    for i, oneYearTable in enumerate(dividedTableWOFederal):
        year = years[i]
        
        if year in years_chosen:
        
            oneYearTable = usRegionMap.join(oneYearTable)

            figtitle = "Prison Occupancy (" + low_or_high + " Estimate) by " + regionType + ", " + str(year)
            plotGeoPandasUSMap(oneYearTable, figtitle, "Occupancy", colorlimit)


    figtitle = "Prison Occupancy (" + low_or_high + " Estimate) by " + regionType
    ax = table.unstack().plot(title = figtitle)
    ax.tick_params(axis = "y", length=0)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=len(table.index.get_level_values(1).unique()))

Plot regions

In [46]:
plotRegionalOccupancy(regionLowOccupancy, usMap, "Low", [2011, 2016])
In [47]:
plotRegionalOccupancy(regionHighOccupancy, usMap, "High", [2011, 2016])

Plot divisions

In [48]:
plotRegionalOccupancy(dropRegionIndex(divisionLowOccupancy), usMap, "Low", [2011, 2016])
In [49]:
plotRegionalOccupancy(dropRegionIndex(divisionHighOccupancy), usMap, "High", [2011, 2016])

Load admission and release data

Load admission

In [50]:
totalAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
totalAdmission.head()
Out[50]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 14724.0 14120.0 12598.0 12830.0 14818.0 16745.0 16013.0 17916.0 18501.0 ... 53618 53662 56153 54121 60634 55938 53664 50865 46912 44682
3 Alabama 2572.0 2597.0 3766.0 4025.0 4425.0 4605.0 4701.0 4370.0 3962.0 ... 10708 11037 13093 11881 11387 11203 11265 10912 10451 10749
4 Alaska 258.0 311.0 459.0 461.0 541.0 711.0 727.0 875.0 1097.0 ... NaN NaN NaN 2650 3789 3906 3906 3846 4271 1804
5 Arizona 1620.0 1641.0 2082.0 2759.0 2910.0 3288.0 3386.0 3989.0 4515.0 ... 14046 14867 14526 13249 13030 12970 13538 14439 14670 13663
6 Arkansas 1958.0 2189.0 2311.0 2419.0 2323.0 2173.0 2179.0 2301.0 2280.0 ... 6651 7017 7383 7603 7059 5782 8987 9435 9351 9911

5 rows × 40 columns

In [51]:
maleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Male").dropna().replace("/", np.nan)
femaleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Female").dropna().replace("/", np.nan)

Clean and melt tables

In [52]:
def cleanAndMeltCountTable(table, states):
    
    # add DC data to Federal, and delete it
    table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
    table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)

    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = "Count")
    table.Year = table.Year.astype(int)
    
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [53]:
meltedTotalAdmission = cleanAndMeltCountTable(totalAdmission, states)
In [54]:
meltedMaleAdmission = cleanAndMeltCountTable(maleAdmission, states)
meltedFemaleAdmission = cleanAndMeltCountTable(femaleAdmission, states)
In [55]:
meltedTotalAdmission.head()
Out[55]:
Jurisdiction Jurisdiction Abbreviation Year Count
0 Alabama AL 1978 2572
1 Alabama AL 1979 2597
2 Alabama AL 1980 3766
3 Alabama AL 1981 4025
4 Alabama AL 1982 4425

Load release

In [56]:
totalRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
maleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Male").dropna().replace("/", np.nan)
femaleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Female").dropna().replace("/", np.nan)

Clean and melt tables

In [57]:
meltedTotalRelease = cleanAndMeltCountTable(totalRelease, states)
meltedMaleRelease = cleanAndMeltCountTable(maleRelease, states)
meltedFemaleRelease = cleanAndMeltCountTable(femaleRelease, states)
In [ ]:
 
In [ ]: